一、需求
项目中需要将文件大小过大的PDF文件,压缩成小PDF文件。通过iText的API,可以实现此需求。在保证文件不失真的前提下,将PDF大文件压缩成小文件。
二、代码
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.*;
import com.itextpdf.text.pdf.parser.PdfImageObject;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class TestReduceImgDemoSuccess {
/**
* 图像的乘法因子
*/
public static float FACTOR = 0.5f;
/**
*
*
* @param src 源文件
* @param dest 目标文件
* @throws IOException
* @throws DocumentException
*/
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
PdfName key = new PdfName("ITXT_SpecialId");
PdfName value = new PdfName("123456789");
// 读取pdf文件
PdfReader reader = new PdfReader(src);
int n = reader.getXrefSize();
PdfObject object;
PRStream stream;
// Look for image and manipulate image stream
for (int i = 0; i < n; i++) {
object = reader.getPdfObject(i);
PdfObject pdfObject = reader.getPdfObject(i);
if (object == null || !object.isStream())
continue;
stream = (PRStream) object;
PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
System.out.println(stream.type());
if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
PdfImageObject image = new PdfImageObject(stream);
BufferedImage bi = image.getBufferedImage();
if (bi == null) continue;
int width = (int) (bi.getWidth() * FACTOR);
int height = (int) (bi.getHeight() * FACTOR);
BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
AffineTransform at = AffineTransform.getScaleInstance(FACTOR, FACTOR);
Graphics2D g = img.createGraphics();
g.drawRenderedImage(bi, at);
ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
//标记此处,后面会修改
ImageIO.write(img, "JPG", imgBytes);
stream.clear();
stream.setData(imgBytes.toByteArray(), false, PRStream.BEST_COMPRESSION);
stream.put(PdfName.TYPE, PdfName.XOBJECT);
stream.put(PdfName.SUBTYPE, PdfName.IMAGE);
stream.put(key, value);
stream.put(PdfName.FILTER, PdfName.DCTDECODE);
stream.put(PdfName.WIDTH, new PdfNumber(width));
stream.put(PdfName.HEIGHT, new PdfNumber(height));
stream.put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));
stream.put(PdfName.COLORSPACE, PdfName.DEVICERGB);
}
}
// Save altered PDF
PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
stamper.close();
reader.close();
}
/**
* Main method.
*
* @param args no arguments needed
* @throws DocumentException
* @throws IOException
*/
public static void main(String[] args) throws IOException, DocumentException {
//createPdf(RESULT);
new TestReduceImgDemoSuccess().manipulatePdf("源文件存储的路径,例如D:/xxx/xx.pdf",
"目标文件的存储路径,例如D:/xxx/result.pdf");
}
}
三、出现的问题
此代码的原理简单来说就是把PDF文件转化成图片流,然后再把图片流输出成PDF文件,可以实现将大PDF文件压缩成小PDF文件。但是如果文件过小可能会出现失真。解决办法为在压缩文件之前,判断一下每个图片文件流的大小,如果过于小(此处假设为500k,可根据业务需要进行设定),就不需要再压缩,只对过大的图片进行压缩。
四、解决办法
再上面代码的标记处,修改如下代码:
ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
//判断文件流的大小,超过500k的才进行压缩,否则不进行压缩
if(img.getData().getDataBuffer().getSize()>512000){
ImageIO.write(img, "JPG", imgBytes);
stream.clear();
stream.setData(imgBytes.toByteArray(), false, PRStream.BEST_COMPRESSION);
stream.put(PdfName.TYPE, PdfName.XOBJECT);
stream.put(PdfName.SUBTYPE, PdfName.IMAGE);
stream.put(key, value);
stream.put(PdfName.FILTER, PdfName.DCTDECODE);
stream.put(PdfName.WIDTH, new PdfNumber(width));
stream.put(PdfName.HEIGHT, new PdfNumber(height));
stream.put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));
stream.put(PdfName.COLORSPACE, PdfName.DEVICERGB);
}else {
ImageIO.write(img, "JPG", imgBytes);
}